library('dplyr')
library('dada2')
library('tidyverse')
library('shiny')
library('phyloseq')
library('Biostrings')
library('tibble')
library('hrbrthemes')
library('microbiomeutilities')
library('ggplot2')
library('microViz')
library('markdown')
library('microbiome')
library('ggtext')
library('patchwork')
library('ggpubr')
library('ggrepel')
library('knitr')
library('tibble')
library('decontam')
library('DESeq2')
library('dendextend')
library('vegan')
library('ggforce')
library('corncob')
threads <- 6
## Registered S3 method overwritten by 'ggside':
##   method from   
##   +.gg   ggplot2
## Registered S3 methods overwritten by 'treeio':
##   method              from    
##   MRCA.phylo          tidytree
##   MRCA.treedata       tidytree
##   Nnode.treedata      tidytree
##   Ntip.treedata       tidytree
##   ancestor.phylo      tidytree
##   ancestor.treedata   tidytree
##   child.phylo         tidytree
##   child.treedata      tidytree
##   full_join.phylo     tidytree
##   full_join.treedata  tidytree
##   groupClade.phylo    tidytree
##   groupClade.treedata tidytree
##   groupOTU.phylo      tidytree
##   groupOTU.treedata   tidytree
##   is.rooted.treedata  tidytree
##   nodeid.phylo        tidytree
##   nodeid.treedata     tidytree
##   nodelab.phylo       tidytree
##   nodelab.treedata    tidytree
##   offspring.phylo     tidytree
##   offspring.treedata  tidytree
##   parent.phylo        tidytree
##   parent.treedata     tidytree
##   root.treedata       tidytree
##   rootnode.phylo      tidytree
##   sibling.phylo       tidytree
## Registered S3 method overwritten by 'ggtree':
##   method      from 
##   identify.gg ggfun

cpn60 Reference Database

After downloading the cpn60 refdb classifier, I added known input sequences to fasta and taxonomy files and saved as: cpn60_v11.seqs.fasta.

# Import taxonomy
qiime tools import --type 'FeatureData[Taxonomy]' --input-format HeaderlessTSVTaxonomyFormat --input-path cpn60_v11_taxonomy_table.txt --output-path cpn60_v11_taxonomy_table.qza

# Import ref seqs
qiime tools import --type 'FeatureData[Sequence]' --input-path cpn60_v11_seqs.fasta --output-path cpn60_v11_seqs.qza

# Train the classifier
qiime feature-classifier fit-classifier-naive-bayes --i-reference-reads cpn60_v11_seqs.qza --i-reference-taxonomy cpn60_v11_taxonomy_table.qza --o-classifier cpn60_classifier_v11.qza

# Trained classifier from paper ref'ed above with known input taxa added is saved as: /202310/cpn60_classifier_v11.qza

Filter and Trim Reads

Primer Sequences:

  • H279 <- ‘GAIIIIGCIGGIGAYGGIACIACIAC’
  • H279.rc <- ‘GTNGTNGTNCCNTCNCCNGCNNNNTC’
  • H1612 <- ‘GAIIIIGCIGGYGACGGYACSACSAC’
  • H1612.rc <- ‘GTNGTNGTNCCGTCNCCNGCNNNNTC’
  • H280 <- ‘YKIYKITCICCRAAICCIGGIGCYTT’
  • H280.rc <- ‘AANGCNCCNGGNTTNGGNGANNNNNN’
  • H1613 <- ‘CGRCGRTCRCCGAAGCCSGGIGCCTT’
  • H1613.rc <- ‘AAGGCNCCNGGCTTCGGNGANCGNCG’
# Load in a list of all F reads (only using F reads for hsp60 samples, this is totally fine)
fnFs <- sort(list.files('./euprymna_fastq', pattern='_R1_001.fastq', full.names = TRUE))
# Check file names
fnFs

# Assign file names for the filtered fastq files
sample.names <- c("BacMixA", "BacMixB", "ES114A", "ES114B", "EsApoA", "EsApoB", "kitcon", "MB13A", "MB15A", "MB15B", "MB211A", "MB211B", "MB212A", "MB212B", "SymES114A", "SymES114B", "SymES114VentA", "SymES114VentB", "SymWtA", "SymWtB", "SymWtC", "VfMixA", "VfMixB")
sample.names

euprymna.hsp60.metadata <- read.delim("euprymna_hsp60_metadata.csv", header=TRUE, sep=",")

# Check quality and filter
plotQualityProfile(fnFs[1:2])

# assign file names for filtered fastq files
filtFs <- file.path("euprymna_hsp60_filtered", paste0(sample.names, "_F_filt.fastq.gz"))
names(filtFs) <- sample.names

# trimLeft comes from hsp60 primer length (all = 26 bp)
out <- filterAndTrim(fwd=fnFs, filt=filtFs,
                     trimLeft=26,
                     maxN=0,
                     maxEE=2,
                     compress=TRUE, verbose=TRUE, multithread=TRUE, rm.phix=TRUE)
head(out)

# make sure a reasonable number of reads were filtered (ideally retains >50k reads/sample-ish)

# Learn the error rates: look at how trimming affects the quality reports
timestamp()
errF <- learnErrors(filtFs, multithread=threads)
saveRDS(errF, file='errF_sampled.rds')
plotFerr <- plotErrors(errF, nominalQ=T)
suppressWarnings(print(plotFerr))

DADA2 Processing

timestamp()
dadaF <- dada(filtFs, err=errF, multithread=threads, pool=FALSE)
dadaF[[1]]

# 296 sequence variants were inferred from 39500 input unique sequences

# Here you would typically merge reads, but I'm only using F reads so no merging is necessary
# Dereplication is done automatically by dada2 when given file names (which I did) so no additional dereplication step is necessary

# Make an ASV table!
seqtab <- makeSequenceTable(dadaF)
class(seqtab) # matrix array
dim(seqtab) # 23 6171
# ^ There were 6171 ASVs identified across 23 samples

# inspect the distribution of sequence lengths
table(nchar(getSequences(seqtab))) # 274 6171
# ^ all 6171 ASVs were 274 bp, which is expected because of the trim parameters used

# remove chimeras
seqtab.nochim.es <- removeBimeraDenovo(seqtab, method = "consensus", multithread = threads, verbose = TRUE)
  # identified 110 bimeras out of 6171 input sequences
dim(seqtab.nochim.es) # 23 6061
sum(seqtab.nochim.es)/sum(seqtab) # 0.9692
# ^ After removing chimeric ASVs, we're left with 6061 ASVs (96.92% of ASVs were non-chimeric)

Table 1. Read Processing Summary

# track how many reads were lost during each step of processing:
getN <- function(x) sum(getUniques(x))

summary_tab <- data.frame(row.names=sample.names, 
                          "DADA2 Input"=out[,1],
                          "Filtered"=out[,2], 
                          "Denoised"=sapply(dadaF, getN),
                          "Final Read Count"=rowSums(seqtab.nochim.es),
                          "Percent Reads Retained"=round(rowSums(seqtab.nochim.es)/out[,1]*100, 1))

# save summary table as a file
write.table(summary_tab, "euprymna_hsp60_reads_tracked.tsv", quote=FALSE, sep="\t", col.names = NA)

# write unique seqs to a .fasta file to import into qiime2
uniquesToFasta(seqtab.nochim.es, fout='euprymna-hsp60-repseqs.fasta', ids=colnames(seqtab.nochim.es))

# write ASV table (seqtab.nochim) to a .txt file to import into QIIME2
write.table(t(seqtab.nochim.es), "euprymna-hsp60-seqtab.txt", sep="\t", row.names=TRUE, col.names=NA, quote=FALSE)

# assign taxonomy in QIIME2 using the pre-trained classifier created on 3/29/2023
Table 1. Read Processing Summary.
Number of total reads per sample retained at each major read processing step.
DADA2.Input Filtered Denoised Final.Read.Count Percent.Reads.Retained
BacMixA 187727 144816 143560 134514 71.7
BacMixB 243557 185214 183127 169495 69.6
ES114A 213942 147339 146562 146562 68.5
ES114B 236500 188218 186530 186530 78.9
EsApoA 105387 59908 50130 47953 45.5
EsApoB 125013 74261 61726 59218 47.4
kitcon 109487 34443 33242 28694 26.2
MB13A 244337 195270 192878 192802 78.9
MB15A 193187 151865 151043 140966 73.0
MB15B 226335 181097 179617 157443 69.6
MB211A 238219 187994 184194 183945 77.2
MB211B 279112 220305 215882 215786 77.3
MB212A 220679 173520 170165 169809 76.9
MB212B 165352 127792 124365 124252 75.1
SymES114A 145488 98491 90008 85945 59.1
SymES114B 194096 116828 114422 108783 56.0
SymES114VentA 219768 132066 118801 112053 51.0
SymES114VentB 211779 159358 154424 153062 72.3
SymWtA 239707 155598 145987 140516 58.6
SymWtB 173729 119739 109516 105946 61.0
SymWtC 269185 195194 184417 180470 67.0
VfMixA 155317 123316 122424 120974 77.9
VfMixB 225861 178305 177472 175113 77.5

Assign Taxonomy Using QIIME2

# import unique ASV sequences into QIIME2
qiime tools import --type 'FeatureData[Sequence]' --input-path euprymna-hsp60-repseqs.fasta --output-path euprymna-hsp60-repseqs.qza

# assign taxonomy to ASVs using pre-trained cpn60 classifier
qiime feature-classifier classify-hybrid-vsearch-sklearn \
--i-query euprymna-hsp60-repseqs.qza \
--i-reference-reads cpn60_v11_seqs.qza \
--i-reference-taxonomy cpn60_v11_taxonomy_table.qza \
--i-classifier cpn60_classifier_v11.qza \
--p-no-prefilter \
--p-maxaccepts all \
--p-maxrejects all \
--p-confidence 0.6 \
--o-classification euprymna_hsp60_classified

# export output files back out of QIIME2
qiime tools export \
--input-path euprymna_hsp60_classified.qza \
--output-path euprymna_hsp60_classified

# add a special header for BIOM in the feature-table:
echo -n "#OTU Table" | cat - euprymna-hsp60-seqtab.txt > euprymna-hsp60-biom-table.txt

# convert to BIOM v2.1:
biom convert -i euprymna-hsp60-biom-table.txt -o euprymna-hsp60.biom --table-type="OTU table" --to-hdf5

# open taxonomy tsv and change headers to "#OTUID taxonomy  confidence", and save a copy as taxonomy.txt

# now updated table.biom file with that biom-taxonomy info:
biom add-metadata -i euprymna-hsp60.biom -o euprymna-hsp60-wtax.biom --observation-metadata-fp taxonomy.txt --sc-separated taxonomy

# then go import the biom-with-taxonomy in R

Import BIOM into Phyloseq

# save ASV sequences as a list "asv.seqs"
asv.seqs <- colnames(seqtab.nochim.es)

# format headers to name your ASV sequences in a fasta file
asv.headers <- vector(dim(seqtab.nochim.es)[2], mode="character")
for (i in 1:dim(seqtab.nochim.es)[2]) {
  asv.headers[i] <- paste(">ASV", i, sep="")
}

# write out a FASTA of final ASV seqs:
asv.fasta <- c(rbind(asv.headers, asv.seqs))
write(asv.fasta, "euprymna-hsp60-ASVs.fasta")

# write out a count table with ASV-IDs instead of ASV-sequences as names:
asv.tab <- t(seqtab.nochim.es)
row.names(asv.tab) <- sub(">", "", asv.headers)
write.table(asv.tab, "euprymna-hsp60-ASVcounts.tsv", sep="\t", quote=F, col.names=NA)

# import euprymna-hsp60 biom as phyloseq object
es.hsp <- import_biom("euprymna-hsp60-wtax.biom")
taxa_names(es.hsp) <- rownames(asv.tab)
tax_table(es.hsp)[1:5]
otu_table(es.hsp)[1:5]

# rename columns in seqtab.nochim.es with sample name (from filt file name)
rownames <- rownames(seqtab.nochim.es)

#use sub() to rename row names
new.rownames <- sub("_.*$", "", rownames)

#print the new row names
print(new.rownames)

sample_names(es.hsp) <- new.rownames
rownames(seqtab.nochim.es) <- new.rownames
sample_data(es.hsp) <- es.hsp.meta
sample_variables(es.hsp)

ranks <- c("domain", "phylum", "class", "order", "family", "genus", "species")
colnames(tax_table(es.hsp))
colnames(tax_table(es.hsp)) <- ranks
tax_table(es.hsp)[1:5]

#format to best hit, do this before you assign "unclassified" labeling!
es.hsp <- format_to_besthit(es.hsp)
view(tax_table(es.hsp))

Contamination Analysis

Run decontam to identify contaminating sequencing based on frequency and prevalence in negative extraction control samples. These ASVs are then pruned from the dataset.

# identify contaminants based on frequency
# "dnaquant": DNA concentration for each sample, used to normalize frequency of ASVs
contamdf.freq.es.hsp <- isContaminant(es.hsp, method="frequency", conc="dnaquant")
head(contamdf.freq.es.hsp)
table(contamdf.freq.es.hsp$contaminant) # identified 21 contaminants
head(which(contamdf.freq.es.hsp$contaminant))

# identify contaminants based on prevalence in negative extraction controls (NEC)
sample_data(es.hsp)$is.neg <- sample_data(es.hsp)$sample.type == "kitcon"
contamdf.prev.es.hsp <- isContaminant(es.hsp, method="prevalence", neg="is.neg")
table(contamdf.prev.es.hsp$contaminant) # identified 7 contaminants
head(which(contamdf.prev.es.hsp$contaminant))
x <- which(contamdf.prev.es.hsp$contaminant)
y <- which(contamdf.freq.es.hsp$contaminant)

# prune contaminant ASVs identified by decontam
contam.asvs.es.hsp <- c(x,y)
contam.asvs.es.hsp
contam.asvs.es.hsp[1:28]=paste0("ASV", contam.asvs.es.hsp[1:28])
contam.asvs.es.hsp

pop.taxa = function(es.hsp, contam.asvs.es.hsp){
  allTaxa = taxa_names(es.hsp)
  allTaxa <- allTaxa[!(allTaxa %in% contam.asvs.es.hsp)]
  return(prune_taxa(allTaxa, es.hsp))
}
es.hsp = pop.taxa(es.hsp, contam.asvs.es.hsp)
es.hsp # 6033 taxa and 23 samples
# ^ 6061 non-chimeric ASVs - 28 contaminating ASVs = 6033 ASVs

# add reads per sample to phyloseq object metadata
sample_data(es.hsp)$reads.sample <- readcount(es.hsp)
sample_data(es.hsp)

Prep Data for Visualization

Sample Metadata

euprymna.hsp60.metadata <- read.csv("es-hsp metadata.csv")
euprymna.hsp60.metadata <- column_to_rownames(euprymna.hsp60.metadata, var="X")
sample_data(es.hsp) <- euprymna.hsp60.metadata
sample_data(es.hsp)

Format to Best Hit

es.hsp <- format_to_besthit(es.hsp)
# ^ This appends the best taxonomic hit (e.g. species name) to the ASV # ID

Rename Taxa

es.hsp.tx <- as.data.frame(tax_table(es.hsp))
colnames(es.hsp.tx) = c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species", "best_hit")
head(es.hsp.tx)
head(tax_table(es.hsp))
# ^ Just adding the appropriate column headers to our tax table

es.hsp.tx$Family[es.hsp.tx$Phylum == "k__Bacteria"] <- "Unidentified Bacteria"
es.hsp.tx$Family[es.hsp.tx$Domain == "Unassigned"] <- "Unassigned at Kingdom Level"
unique(es.hsp.tx$Family)
# ^ Any ASVs that were not assigned taxonomy past the Phylum level will be labeled "k__Bacteria" at all other taxonomic levels, and ASVs that were not assigned taxonomy past the Domain level will be labeled "Unassigned" at all other taxonomic levels. This is just renaming those labels as "Unidentified Bacteria" and "Unassigned at Kingdom Level", respectively.

es.hsp.tx <- as.matrix(es.hsp.tx)
tax_table(es.hsp) <- es.hsp.tx
tax_table(es.hsp)[1:5,1:5]
# ^ Adding these updated taxonomic labels to our phyloseq object

Figure 1. Observed Culture

Define Figure 1 Sample Order

phyloseq::sample_names(es.hsp)
es.sample.order <- c("kitcon", "ES114A", "ES114B", "VfMixA", "VfMixB", "BacMixA", "BacMixB", "EsApoA", "EsApoB", "SymES114VentA", "SymES114VentB", "SymES114A", "SymES114B", "SymWtA", "SymWtB", "SymWtC", "MB13A", "MB15A", "MB15B", "MB211A", "MB211B", "MB212A", "MB212B")

Create Figure 1 Palette

figure2_pal <- c("Aliivibrio fischeri ES114" = "#734f5a",
           "Aliivibrio fischeri MB13B1" = "#264653",
           "Aliivibrio fischeri MB13B2" = "#2a9d8f",
           "Rossellomorea aquimaris TF-12" = "#f4a261",
           "Pseudoalteromonas luteoviolacea HI1" = "#e76f51",
           "Vibrio litoralis DSM17657" = "#941c2f",
           "Other Species" = "#d5bdaf")
tax_palette_plot(figure2_pal)

Culture Samples

# Subset only Bacterial Culture samples
es.hsp.culture <- es.hsp %>%
  subset_samples(sample.type == "bacterial culture")

# Extract ASV data (abundance counts)
culture.asv.data <- otu_table(es.hsp.culture)

# Extract taxonomy data
culture.tax.data <- tax_table(es.hsp.culture)

# Convert ASV data to data frame and filter out ASVs not present in any culture samples
df.culture.asv <- as.data.frame(culture.asv.data)
# Sum the counts for each ASV and filter out those with zero counts across all samples
asv_sums <- rowSums(df.culture.asv)
df.culture.asv <- df.culture.asv[asv_sums > 0, ]

# Make sure to filter the taxonomy data to match the filtered ASV data
df.culture.tax <- as.data.frame(culture.tax.data)
df.culture.tax <- df.culture.tax[asv_sums > 0, ]

# Convert back to otu_table and tax_table
filtered_culture.asv.data <- otu_table(as.matrix(df.culture.asv), taxa_are_rows = TRUE)
filtered_culture.tax.data <- tax_table(as.matrix(df.culture.tax))

# Recreate the phyloseq object with the filtered data
es.hsp.culture.filtered <- phyloseq(filtered_culture.asv.data, filtered_culture.tax.data, sample_data(es.hsp.culture))

# Check the result
print(es.hsp.culture.filtered)

Plot Culture Samples

# Subset culture samples
sample_data(es.hsp.culture)$treatment <- factor(sample_data(es.hsp.culture)$treatment, levels = c("ES114 culture", "mixed V. fischeri culture", "mixed bacteria culture"))

fig1a <- es.hsp.culture %>%
  tax_transform("compositional", chain=TRUE)

fig1a <- comp_barplot(fig1a,
                         "Species",
                         n_taxa = 6,
                         tax_order=c("Aliivibrio fischeri ES114", 
                                     "Aliivibrio fischeri MB13B1", 
                                     "Aliivibrio fischeri MB13B2", 
                                     "Rossellomorea aquimaris TF-12", 
                                     "Pseudoalteromonas luteoviolacea HI1", 
                                     "Vibrio litoralis DSM17657"),
                         merge_other=FALSE,
                         bar_width = 0.975,
                         other_name = "Other Species",
                         palette = figure2_pal
                         ) +
  facet_col(~ treatment, scales="free_y") +
  labs(y = "Relative Abundance",
       title = "Observed Strains",
       fill = "Input Strains") +
  scale_y_percent(limits = c(0, 1.05)) +
  theme_biome_utils() +
  theme(legend.text = element_text(face = "italic", size = 8),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 8),
        axis.title.y = element_text(size = 9),
        axis.title.x = element_text(size = 9),
        strip.text = element_text(hjust = 0.5, face = "bold"),
        plot.title = element_text(size = 10, hjust = 0, face = "bold"),
        legend.title = element_text(size = 10),
        legend.position = "left",
        strip.background = element_blank(),
        panel.border = element_blank(),
        legend.key.width = unit(0.3, "cm"), 
        legend.key.height = unit(0.3, "cm")) +
  coord_flip()

Figure 1A

Save Culture Samples Plot

pdf("fig1a.pdf", width=7, height=7)
fig1a + 
  plot_layout(heights = c(2, 2, 2), ncol = 1) + 
  theme(plot.margin = unit(c(1, 1, 0, 1), "cm"))  # Top, Right, Bottom, Left
dev.off()

Figure 1. Hsp60 Tree

hsp60 Multiple Sequence Alignment

library(msa)
library(ggtree)
library(phangorn)
library(ape)

vfhsp60 <- readDNAStringSet("hsp60_ncbi_vf.fasta")
names(vfhsp60) <- make.unique(names(vfhsp60))
# ^ This should already be true, but just making sure all sequences have unique names

# Perform multiple sequence alignment using ClustalW (you can choose other tools like ClustalOmega or MUSCLE)
alignment <- msa(vfhsp60, method = "ClustalW")

# Convert the alignment to a DNAStringSet object
aligned_seqs <- as(alignment, "DNAStringSet")

# Convert the DNAStringSet to a matrix suitable for phylogenetic analysis
alignment_matrix <- as.matrix(aligned_seqs)

# Convert the alignment matrix to a phyDat object
alignment_phyDat <- phyDat(alignment_matrix, type = "DNA")

# Create a distance matrix
dist_matrix <- dist.ml(alignment_phyDat)

hsp60 Neighbor-Joining Tree

Figure 1B

Save hsp60 Tree Plot

pdf("fig1b.pdf", height = 20, width = 5)
tree
dev.off()

Figure 2. Vibrio fischeri Abundance in Squid Samples

Rename Taxa Without Strain Names

# Look at current naming scheme
head(tax_table(es.hsp)[, "Species"])

# Function to remove strain names from taxa names
remove_strain_names <- function(taxa_name) {
  # Use gsub to remove anything after the second part of the name
  gsub("(\\w+\\s\\w+).*", "\\1", taxa_name)
}

# Duplicate es.hsp phyloseq object so the original naming scheme still exists
es.hsp.2 <- es.hsp

# Apply the function to all taxa names in the phyloseq object
tax_table(es.hsp.2) <- apply(tax_table(es.hsp.2), 2, function(x) remove_strain_names(x))

# Example to check if it worked (assuming tax_table has "Species" as a column)
head(tax_table(es.hsp.2)[, "Species"])

Calculate Percent V. fischeri Abundance in Squid

# Extract the OTU and taxonomy tables
otu_data <- otu_table(es.hsp.2)
tax_data <- tax_table(es.hsp.2)

# Identify which taxa belong to "Aliivibrio fischeri"
fischeri_indices <- which(tax_data[, "Species"] == "Aliivibrio fischeri")

# Calulate the total abundance for each sample
total_abundance <- colSums(otu_data)

# Calculate the abundance of Aliivibrio fischeri in each sample
fischeri_abundance <- colSums(otu_data[fischeri_indices, ])

# Calculate as a percentage
fischeri_percentage <- (fischeri_abundance / total_abundance) * 100

# Access the sample_data of the phyloseq object
es.hsp.meta <- sample_data(es.hsp.2)

# Add the fischeri_percentage as a new column
es.hsp.meta$fischeri_percentage <- fischeri_percentage

# Replace the sample_data in the phyloseq object with the modified sample_data
sample_data(es.hsp.2) <- es.hsp.meta
head(sample_data(es.hsp.2))

# Extract the fischeri_percentage from the sample_data
fischeri_percent_data <- sample_data(es.hsp.2)$fischeri_percentage

# Create a new label in the es.hsp sample data that concatenates the sample name and the Aliivibrio fischeri percentage (rather than trying to put the Aliivibrio fischeri percentage on a different axis, much more complicated)
new_labels <- paste(sample_names(es.hsp.2), sprintf(" (%.2f%%)", fischeri_percent_data))

sample_data(es.hsp.2)$FischeriLabel <- paste(sample_data(es.hsp.2)$SAMPLE, new_labels)

Define Figure 2A Sample Order

phyloseq::sample_names(es.hsp.2)
##  [1] "BacMixA"       "BacMixB"       "ES114A"        "ES114B"       
##  [5] "EsApoA"        "EsApoB"        "kitcon"        "MB13A"        
##  [9] "MB15A"         "MB15B"         "MB211A"        "MB211B"       
## [13] "MB212A"        "MB212B"        "SymES114A"     "SymES114B"    
## [17] "SymES114VentA" "SymES114VentB" "SymWtA"        "SymWtB"       
## [21] "SymWtC"        "VfMixA"        "VfMixB"
fig2a_order <- c("EsApoB", "EsApoA", "SymWtC", "SymWtB", "SymWtA", "SymES114B", "SymES114A", "SymES114VentB", "SymES114VentA", "MB212B", "MB212A", "MB211B", "MB211A", "MB15B", "MB15A", "MB13A")

Create Figure 2A Palette

fig2a_palette <- c("Aliivibrio fischeri" = "#104911",
                    "Vibrio (Genus)" = "lightblue",
                    "Other Taxa" = "#d5bdaf")
tax_palette_plot(fig2a_palette)

Plot V. fischeri Relative Abundance

# Use es.hsp.2 for species-level taxa
# Subset sample types of interest: juvenile squid, juvenile squid ventate, adult squid
es.hsp_filtered <- subset_samples(es.hsp.2, sample.type %in% c("juvenile squid", "juvenile squid ventate", "adult squid"))

# Perform compositional transformation (relative abundance)
es.hsp_filtered <- es.hsp_filtered %>%
  tax_transform("compositional", chain = TRUE)

# Ensure the tax_table is in the correct format and modify it to label all species within the "Vibrio" genus
tax_table_temp <- as.data.frame(tax_table(es.hsp_filtered))
tax_table_temp$Species <- ifelse(tax_table_temp$Genus == "Vibrio", "Vibrio (Genus)", tax_table_temp$Species)
tax_table(es.hsp_filtered) <- tax_table(as.matrix(tax_table_temp))

# Create the bar plot with n_taxa = 2 for "Aliivibrio fischeri" and the "Vibrio" genus
fig2a <- comp_barplot(es.hsp_filtered, "Species",
  n_taxa = 2, # Only highlight 2 taxa
  label = "FischeriLabel", # sample name + percent Vf
  sample_order = fig2a_order,
  merge_other = TRUE, # Merge other taxa into "Other Species"
  bar_width = 0.975,
  palette = fig2a_palette,
  group_by = "treatment",
  other_name = "Other Taxa",
  tax_order = c("Aliivibrio fischeri", "Vibrio (Genus)")
)

# Wrap Plots
fig2a <- patchwork::wrap_plots(fig2a, ncol = 1, guides = "collect", scale = "free_x") &
  labs(y = "Relative Abundance") &
  scale_y_percent(limits = c(0, 1.05)) &
  theme_biome_utils() &
  theme(
    legend.text = element_text(face = "italic", size = 9),
    axis.text.x = element_text(size = 8),
    axis.text.y = element_text(size = 8),
    axis.title.y = element_text(size = 9),
    axis.title.x = element_text(size = 8),
    strip.text = element_text(hjust = 0.5),
    legend.key.width = unit(0.3, "cm"), 
    legend.key.height = unit(0.3, "cm"),
    plot.title = element_text(size = 10, hjust = 0, face = "bold"
                              ),
    legend.title = element_blank(),
    legend.position = "bottom",
    panel.border = element_blank(),
    legend.box.margin = margin(0, 2, 0, 0, "cm")
  ) &
  coord_flip()

fig2a <- fig2a + plot_layout(heights = c(2, 3, 2, 2, 7), ncol = 1)

Figure 2A

Save Vibrio fischeri Relative Abundance Plot

pdf("fig2a.pdf", height = 6, width = 5)
par(mar=c(9,5,1,2), cex=1)
fig2a + plot_layout(heights = c(2, 3, 2, 2, 7), ncol = 1)
dev.off()

Define Fig 2B Sample Order

fig2b_order <- c("EsApoB", "EsApoA", "SymWtC", "SymWtB", "SymWtA", "SymES114B", "SymES114A", "SymES114VentA", "SymES114VentB", "MB212B", "MB212A", "MB211B", "MB211A", "MB15B", "MB15A", "MB13A")

Create Palette for Figure 2B

fig2b_palette <- c("Vibrionaceae" = "lightblue",
                 "Bradyrhizobiaceae" = "#9c8bb3",
                 "Methylobacteriaceae" = "#604f78",
                 "Rhizobiaceae" = "darkseagreen2",
                 "Rhodobacteraceae" = "darkseagreen3",
                 "o__Rhodobacterales" = "darkseagreen4",
                 "c__Proteobacteria" = "indianred1",
                 "c__Alphaproteobacteria" = "indianred3",
                 "Alteromonadaceae" = "lightgoldenrod1",
                 "c__Gammaproteobacteria" = "lightgoldenrod3",
                 "c__Actinomycetia" = "darkslategray4",
                 "c__Actinobacteria" = "darkslategray",
                 "Other Taxa" = "#d5bdaf",
                 "Unidentified Bacteria" = "blanchedalmond")

tax_palette_plot(fig2b_palette)

Define Taxa Order for Figure 2B

fig2b_taxorder <- c("Vibrionaceae",
                     "Bradyrhizobiaceae",
                     "Methylobacteriaceae",
                     "Rhizobiaceae",
                     "Rhodobacteraceae",
                     "o__Rhodobacterales",
                     "c__Proteobacteria",
                     "c__Alphaproteobacteria",
                     "Alteromonadaceae",
                     "c__Gammaproteobacteria",
                     "c__Actinomycetia",
                     "c__Actinobacteria",
                     "Other Taxa",
                     "Unidentified Bacteria")

Calculate Percent Non-Vibrio fischeri Abundance in Squid

es.hsp.fig2b <- es.hsp.2

# Extract the OTU and taxonomy tables
otu_data <- otu_table(es.hsp.fig2b)
tax_data <- tax_table(es.hsp.fig2b)

# Identify which taxa belong to "Aliivibrio fischeri"
nonfischeri_indices <- which(tax_data[, "Genus"] != "Aliivibrio")

# Calulate the total abundance for each sample
total_abundance <- colSums(otu_data)

# Calculate the abundance of "fischeri" in each sample
nonfischeri_abundance <- colSums(otu_data[nonfischeri_indices, ])

# Calculate as a percentage
nonfischeri_percentage <- (nonfischeri_abundance / total_abundance) * 100

# Access the sample_data of the phyloseq object
es.hsp.fig2b.meta <- sample_data(es.hsp.fig2b)

# Add the fischeri_percentage as a new column
es.hsp.fig2b.meta$nonfischeri_percentage <- nonfischeri_percentage

# Replace the sample_data in the phyloseq object with the modified sample_data
sample_data(es.hsp.fig2b) <- es.hsp.fig2b.meta
head(sample_data(es.hsp.fig2b))
##               sample.type              treatment     squid.id dnaquant   color
## BacMixA bacterial culture mixed bacteria culture      culture    0.374 #D4BC4D
## BacMixB bacterial culture mixed bacteria culture      culture    0.266 #D4BC4D
## ES114A  bacterial culture          ES114 culture      culture    0.556 #138808
## ES114B  bacterial culture          ES114 culture      culture    0.300 #138808
## EsApoA     juvenile squid           apo juvenile apo juvenile    1.020 #536878
## EsApoB     juvenile squid           apo juvenile apo juvenile    2.080 #536878
##         is.neg reads.sample vibrionaceae_percentage  VibrionaceaeLabel
## BacMixA  FALSE       127601              94.8127366  BacMixA  (94.81%)
## BacMixB  FALSE       156985              94.4943784  BacMixB  (94.49%)
## ES114A   FALSE       146490              99.2825449   ES114A  (99.28%)
## ES114B   FALSE       186324              98.8423392   ES114B  (98.84%)
## EsApoA   FALSE        47636               0.2540096    EsApoA  (0.25%)
## EsApoB   FALSE        58711               1.2757405    EsApoB  (1.28%)
##         fischeri_percentage      FischeriLabel nonfischeri_percentage
## BacMixA         80.41394660  BacMixA  (80.41%)             19.5860534
## BacMixB         74.69694557  BacMixB  (74.70%)             25.3030544
## ES114A          99.28254488   ES114A  (99.28%)              0.7174551
## ES114B          98.83965565   ES114B  (98.84%)              1.1576608
## EsApoA           0.09446637    EsApoA  (0.09%)             99.9055336
## EsApoB           0.22823662    EsApoB  (0.23%)             99.7717634
# Extract the fischeri_percentage from the sample_data
nonfischeri_percent_data <- sample_data(es.hsp.fig2b)$nonfischeri_percentage

# Create a new label in the es.hsp.fig2b sample data that concatenates the sample name and the non-Vibrio fischeri percentage (rather than trying to put the non-Vibrio fischeri percentage on a different axis, much more complicated)
new_labels <- paste(sample_names(es.hsp.fig2b), sprintf(" (%.2f%%)", nonfischeri_percent_data))

sample_data(es.hsp.fig2b)$NonFischeriLabel <- paste(sample_data(es.hsp.fig2b)$SAMPLE, new_labels)

Plot Non-Vibrio fischeri Relative Abundance

Figure 2B

Save Non-Vibrio fischeri Relative Abundance Plot

pdf("fig2b.pdf", height = 7, width = 5)
par(mar=c(9,5,1,2), cex=1)
fig2b + plot_layout(heights = c(2, 3, 2, 2, 7), ncol = 1)
dev.off()